#!/usr/bin/env python

"""
<Program Name>
  formats.py

<Author>
  Geremy Condra
  Vladimir Diaz <vladimir.v.diaz@gmail.com>

<Started>
  Refactored April 30, 2012. -vladimir.v.diaz

<Copyright>
  See LICENSE for licensing information.

<Purpose>
  A central location for all format-related checking of TUF objects.
  Note: 'formats.py' depends heavily on 'schema.py', so the 'schema.py'
  module should be read and understood before tackling this module.

  'formats.py' can be broken down into three sections.  (1) Schemas and object
  matching.  (2) Functions that help produce or verify TUF objects.

  The first section deals with schemas and object matching based on format.
  There are two ways of checking the format of objects.  The first method
  raises a 'tuf.FormatError' exception if the match fails and the other
  returns a Boolean result.

  tuf.formats.<SCHEMA>.check_match(object)
  tuf.formats.<SCHEMA>.matches(object)

  Example:

  rsa_key = {'keytype': 'rsa'
             'keyid': 34892fc465ac76bc3232fab 
             'keyval': {'public': 'public_key',
                        'private': 'private_key'}

  tuf.formats.RSAKEY_SCHEMA.check_match(rsa_key)
  tuf.formats.RSAKEY_SCHEMA.matches(rsa_key)

  In this example, if a dict key or dict value is missing or incorrect,
  the match fails.  There are numerous variations of object checking
  provided by 'formats.py' and 'schema.py'.

  The second section contains miscellaneous functions related to the format of
  TUF objects.
  Example: 
  
  signable_object = make_signable(unsigned_object)
"""

# Help with Python 3 compatibility, where the print statement is a function, an
# implicit relative import is invalid, and the '/' operator performs true
# division.  Example:  print 'hello world' raises a 'SyntaxError' exception.
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals

import binascii
import calendar
import re
import string
import datetime
import time

import tuf
import tuf.schema as SCHEMA

import six

# Note that in the schema definitions below, the 'SCHEMA.Object' types allow
# additional keys which are not defined. Thus, any additions to them will be
# easily backwards compatible with clients that are already deployed.

# A datetime in 'YYYY-MM-DDTHH:MM:SSZ' ISO 8601 format.  The "Z" zone designator
# for the zero UTC offset is always used (i.e., a numerical offset is not
# supported.)  Example: '2015-10-21T13:20:00Z'.  Note:  This is a simple format
# check, and an ISO8601 string should be fully verified when it is parsed.
ISO8601_DATETIME_SCHEMA = SCHEMA.RegularExpression(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z')

# A Unix/POSIX time format.  An integer representing the number of seconds
# since the epoch (January 1, 1970.)  Metadata uses this format for the
# 'expires' field.  Set 'hi' to the upper timestamp limit (year 2038), the max
# value of an int.
UNIX_TIMESTAMP_SCHEMA = SCHEMA.Integer(lo=0, hi=2147483647)

# A hexadecimal value in '23432df87ab..' format.
HASH_SCHEMA = SCHEMA.RegularExpression(r'[a-fA-F0-9]+')

# A dict in {'sha256': '23432df87ab..', 'sha512': '34324abc34df..', ...} format.
HASHDICT_SCHEMA = SCHEMA.DictOf(
  key_schema = SCHEMA.AnyString(),
  value_schema = HASH_SCHEMA)

# A hexadecimal value in '23432df87ab..' format.
HEX_SCHEMA = SCHEMA.RegularExpression(r'[a-fA-F0-9]+')

# A key identifier (e.g., a hexadecimal value identifying an RSA key).
KEYID_SCHEMA = HASH_SCHEMA

# A list of KEYID_SCHEMA.
KEYIDS_SCHEMA = SCHEMA.ListOf(KEYID_SCHEMA)

# The method used for a generated signature (e.g., 'RSASSA-PSS').
SIG_METHOD_SCHEMA = SCHEMA.AnyString()

# A relative file path (e.g., 'metadata/root/').
RELPATH_SCHEMA = SCHEMA.AnyString()
RELPATHS_SCHEMA = SCHEMA.ListOf(RELPATH_SCHEMA)

# An absolute path.
PATH_SCHEMA = SCHEMA.AnyString()
PATHS_SCHEMA = SCHEMA.ListOf(PATH_SCHEMA)

# Uniform Resource Locator identifier (e.g., 'https://www.updateframework.com/').
URL_SCHEMA = SCHEMA.AnyString()

# A dictionary holding version information.
VERSION_SCHEMA = SCHEMA.Object(
  object_name = 'VERSION_SCHEMA',
  major = SCHEMA.Integer(lo=0),
  minor = SCHEMA.Integer(lo=0),
  fix = SCHEMA.Integer(lo=0))

# An integer representing the numbered version of a metadata file.
# Must be 1, or greater.
METADATAVERSION_SCHEMA = SCHEMA.Integer(lo=0)

# An integer representing length.  Must be 0, or greater.
LENGTH_SCHEMA = SCHEMA.Integer(lo=0)

# An integer representing logger levels, such as logging.CRITICAL (=50).
# Must be between 0 and 50.
LOGLEVEL_SCHEMA = SCHEMA.Integer(lo=0, hi=50)

# A string representing a named object.
NAME_SCHEMA = SCHEMA.AnyString()
NAMES_SCHEMA = SCHEMA.ListOf(NAME_SCHEMA)

# A byte string representing data.
DATA_SCHEMA = SCHEMA.AnyBytes()

# Supported hash algorithms.
HASHALGORITHMS_SCHEMA = SCHEMA.ListOf(SCHEMA.OneOf(
  [SCHEMA.String('md5'), SCHEMA.String('sha1'),
   SCHEMA.String('sha224'), SCHEMA.String('sha256'),
   SCHEMA.String('sha384'), SCHEMA.String('sha512')]))

# The contents of an encrypted TUF key.  Encrypted TUF keys are saved to files
# in this format.
ENCRYPTEDKEY_SCHEMA = SCHEMA.AnyBytes()

# A value that is either True or False, on or off, etc.
BOOLEAN_SCHEMA = SCHEMA.Boolean()

# A role's threshold value (i.e., the minimum number
# of signatures required to sign a metadata file).
# Must be 1 and greater.
THRESHOLD_SCHEMA = SCHEMA.Integer(lo=1)

# A string representing a role's name. 
ROLENAME_SCHEMA = SCHEMA.AnyString()

# The minimum number of bits for an RSA key.  Must be 2048 bits, or greater
# (recommended by TUF). Crypto modules like 'pycrypto_keys.py' may set further
# restrictions on keys (e.g., the number of bits must be a multiple of 256).
# Recommended RSA key sizes:
# http://www.emc.com/emc-plus/rsa-labs/historical/twirl-and-rsa-key-size.htm#table1
RSAKEYBITS_SCHEMA = SCHEMA.Integer(lo=2048)

# The number of hashed bins, or the number of delegated roles.  See
# delegate_hashed_bins() in 'repository_tool.py' for an example.  Note:
# Tools may require further restrictions on the number of bins, such
# as requiring them to be a power of 2. 
NUMBINS_SCHEMA = SCHEMA.Integer(lo=1)

# A PyCrypto signature.
PYCRYPTOSIGNATURE_SCHEMA = SCHEMA.AnyBytes()

# A pyca-cryptography signature.
PYCACRYPTOSIGNATURE_SCHEMA = SCHEMA.AnyBytes()

# An RSA key in PEM format.
PEMRSA_SCHEMA = SCHEMA.AnyString()

# A string representing a password.
PASSWORD_SCHEMA = SCHEMA.AnyString()

# A list of passwords.
PASSWORDS_SCHEMA = SCHEMA.ListOf(PASSWORD_SCHEMA)

# The actual values of a key, as opposed to meta data such as a key type and
# key identifier ('rsa', 233df889cb).  For RSA keys, the key value is a pair of
# public and private keys in PEM Format stored as strings.
KEYVAL_SCHEMA = SCHEMA.Object(
  object_name = 'KEYVAL_SCHEMA',
  public = SCHEMA.AnyString(),
  private = SCHEMA.Optional(SCHEMA.AnyString()))

# Supported TUF key types. 
KEYTYPE_SCHEMA = SCHEMA.OneOf(
  [SCHEMA.String('rsa'), SCHEMA.String('ed25519')])

# A generic TUF key.  All TUF keys should be saved to metadata files in this
# format.
KEY_SCHEMA = SCHEMA.Object(
  object_name = 'KEY_SCHEMA',
  keytype = SCHEMA.AnyString(),
  keyval = KEYVAL_SCHEMA,
  expires = SCHEMA.Optional(ISO8601_DATETIME_SCHEMA))

# A TUF key object.  This schema simplifies validation of keys that may be one
# of the supported key types.  Supported key types: 'rsa', 'ed25519'.
ANYKEY_SCHEMA = SCHEMA.Object(
  object_name = 'ANYKEY_SCHEMA',
  keytype = KEYTYPE_SCHEMA,
  keyid = KEYID_SCHEMA,
  keyid_hash_algorithms = SCHEMA.Optional(HASHALGORITHMS_SCHEMA),
  keyval = KEYVAL_SCHEMA,
  expires = SCHEMA.Optional(ISO8601_DATETIME_SCHEMA))

# A list of TUF key objects.
ANYKEYLIST_SCHEMA = SCHEMA.ListOf(ANYKEY_SCHEMA)

# An RSA TUF key.
RSAKEY_SCHEMA = SCHEMA.Object(
  object_name = 'RSAKEY_SCHEMA',
  keytype = SCHEMA.String('rsa'),
  keyid = KEYID_SCHEMA,
  keyid_hash_algorithms = SCHEMA.Optional(HASHALGORITHMS_SCHEMA),
  keyval = KEYVAL_SCHEMA)

# An ED25519 raw public key, which must be 32 bytes.
ED25519PUBLIC_SCHEMA = SCHEMA.LengthBytes(32)

# An ED25519 raw seed key, which must be 32 bytes.  
ED25519SEED_SCHEMA = SCHEMA.LengthBytes(32)

# An ED25519 raw signature, which must be 64 bytes.  
ED25519SIGNATURE_SCHEMA = SCHEMA.LengthBytes(64)

# Required installation libraries expected by the repository tools and other
# cryptography modules.
REQUIRED_LIBRARIES_SCHEMA = SCHEMA.ListOf(SCHEMA.OneOf(
  [SCHEMA.String('general'), SCHEMA.String('ed25519'), SCHEMA.String('rsa')]))

# An ed25519 TUF key.
ED25519KEY_SCHEMA = SCHEMA.Object(
  object_name = 'ED25519KEY_SCHEMA',
  keytype = SCHEMA.String('ed25519'),
  keyid = KEYID_SCHEMA,
  keyid_hash_algorithms = SCHEMA.Optional(HASHALGORITHMS_SCHEMA),
  keyval = KEYVAL_SCHEMA)

# Information about target files, like file length and file hash(es).  This
# schema allows the storage of multiple hashes for the same file (e.g., sha256
# and sha512 may be computed for the same file and stored).
FILEINFO_SCHEMA = SCHEMA.Object(
  object_name = 'FILEINFO_SCHEMA',
  length = LENGTH_SCHEMA,
  hashes = HASHDICT_SCHEMA,
  version = SCHEMA.Optional(METADATAVERSION_SCHEMA),
  custom = SCHEMA.Optional(SCHEMA.Object()))

# Version information specified in "snapshot.json" for each role available on
# the TUF repository.  The 'FILEINFO_SCHEMA' object was previously listed in
# the snapshot role, but was switched to this object format to reduce the
# amount of metadata that needs to be downloaded.  Listing version numbers in
# "snapshot.json" also prevents rollback attacks for roles that clients have
# not downloaded. 
VERSIONINFO_SCHEMA = SCHEMA.Object(
  object_name = 'VERSIONINFO_SCHEMA',
  version = METADATAVERSION_SCHEMA)

# A dict holding the version or file information for a particular metadata
# role.  The dict keys hold the relative file paths, and the dict values the
# corresponding version numbers and/or file information.
FILEINFODICT_SCHEMA = SCHEMA.DictOf(
  key_schema = RELPATH_SCHEMA,
  value_schema = SCHEMA.OneOf([VERSIONINFO_SCHEMA, FILEINFO_SCHEMA]))

# A dict holding the information for a particular target / file.  The dict keys
# hold the relative file paths, and the dict values the corresponding file
# information.
FILEDICT_SCHEMA = SCHEMA.DictOf(
  key_schema = RELPATH_SCHEMA,
  value_schema = FILEINFO_SCHEMA)

# A dict holding a target file.
TARGETFILE_SCHEMA = SCHEMA.Object(
  object_name = 'TARGETFILE_SCHEMA',
  filepath = RELPATH_SCHEMA,
  fileinfo = FILEINFO_SCHEMA)

# A list of TARGETFILE_SCHEMA.
TARGETFILES_SCHEMA = SCHEMA.ListOf(TARGETFILE_SCHEMA)

# A single signature of an object.  Indicates the signature, the ID of the
# signing key, and the signing method.
# I debated making the signature schema not contain the key ID and instead have
# the signatures of a file be a dictionary with the key being the keyid and the
# value being the signature schema without the keyid. That would be under
# the argument that a key should only be able to sign a file once. However,
# one can imagine that maybe a key wants to sign multiple times with different
# signature methods.
SIGNATURE_SCHEMA = SCHEMA.Object(
  object_name = 'SIGNATURE_SCHEMA',
  keyid = KEYID_SCHEMA,
  method = SIG_METHOD_SCHEMA,
  sig = HEX_SCHEMA)

# List of SIGNATURE_SCHEMA.
SIGNATURES_SCHEMA = SCHEMA.ListOf(SIGNATURE_SCHEMA)

# A schema holding the result of checking the signatures of a particular
# 'SIGNABLE_SCHEMA' role.
# For example, how many of the signatures for the 'Target' role are
# valid?  This SCHEMA holds this information.  See 'sig.py' for
# more information.
SIGNATURESTATUS_SCHEMA = SCHEMA.Object(
  object_name = 'SIGNATURESTATUS_SCHEMA',
  threshold = SCHEMA.Integer(),
  good_sigs = KEYIDS_SCHEMA,
  bad_sigs = KEYIDS_SCHEMA,
  unknown_sigs = KEYIDS_SCHEMA,
  untrusted_sigs = KEYIDS_SCHEMA,
  unknown_method_sigs = KEYIDS_SCHEMA)

# A signable object.  Holds the signing role and its associated signatures.
SIGNABLE_SCHEMA = SCHEMA.Object(
  object_name = 'SIGNABLE_SCHEMA',
  signed = SCHEMA.Any(),
  signatures = SCHEMA.ListOf(SIGNATURE_SCHEMA))

# A dictionary where the dict keys hold a keyid and the dict values a key
# object.
KEYDICT_SCHEMA = SCHEMA.DictOf(
  key_schema = KEYID_SCHEMA,
  value_schema = KEY_SCHEMA)

# The format used by the key database to store keys.  The dict keys hold a key
# identifier and the dict values any object.  The key database should store
# key objects in the values (e.g., 'RSAKEY_SCHEMA', 'DSAKEY_SCHEMA').
KEYDB_SCHEMA = SCHEMA.DictOf(
  key_schema = KEYID_SCHEMA,
  value_schema = SCHEMA.Any())

# The format of the resulting "scp config dict" after extraction from the
# push configuration file (i.e., push.cfg).  In the case of a config file
# utilizing the scp transfer module, it must contain the 'general' and 'scp'
# sections, where 'general' must contain a 'transfer_module' and
# 'metadata_path' entry, and 'scp' the 'host', 'user', 'identity_file', and
# 'remote_directory' entries.  See 'tuf/pushtools/pushtoolslib.py' and
# 'tuf/pushtools/push.py'.
SCPCONFIG_SCHEMA = SCHEMA.Object(
  object_name = 'SCPCONFIG_SCHEMA',
  general = SCHEMA.Object(
    object_name = '[general]',
    transfer_module = SCHEMA.String('scp'),
    metadata_path = PATH_SCHEMA,
    targets_directory = PATH_SCHEMA),
  scp=SCHEMA.Object(
    object_name = '[scp]',
    host = URL_SCHEMA,
    user = NAME_SCHEMA,
    identity_file = PATH_SCHEMA,
    remote_directory = PATH_SCHEMA))

# The format of the resulting "receive config dict" after extraction from the
# receive configuration file (i.e., receive.cfg).  The receive config file
# must contain a 'general' section, and this section the 'pushroots',
# 'repository_directory', 'metadata_directory', 'targets_directory', and
# 'backup_directory' entries.
# see 'tuf/pushtools/pushtoolslib.py' and 'tuf/pushtools/receive/receive.py'
RECEIVECONFIG_SCHEMA = SCHEMA.Object(
  object_name = 'RECEIVECONFIG_SCHEMA', general=SCHEMA.Object(
    object_name = '[general]',
    pushroots = SCHEMA.ListOf(PATH_SCHEMA),
    repository_directory = PATH_SCHEMA,
    metadata_directory = PATH_SCHEMA,
    targets_directory = PATH_SCHEMA,
    backup_directory = PATH_SCHEMA)) 

# A path hash prefix is a hexadecimal string.
PATH_HASH_PREFIX_SCHEMA = HEX_SCHEMA

# A list of path hash prefixes.
PATH_HASH_PREFIXES_SCHEMA = SCHEMA.ListOf(PATH_HASH_PREFIX_SCHEMA)

# THIS SCHEMA is used for a variety of different things, like delegation
# metadata and the list of keyids and threshold for top-level roles in root
# metadata.  That's... not good.
# Role object in {'keyids': [keydids..], 'name': 'ABC', 'threshold': 1,
# 'paths':[filepaths..]} format.
ROLE_SCHEMA = SCHEMA.Object(
  object_name = 'ROLE_SCHEMA',
  name = SCHEMA.Optional(ROLENAME_SCHEMA),
  keyids = KEYIDS_SCHEMA,
  threshold = THRESHOLD_SCHEMA,
  backtrack = SCHEMA.Optional(BOOLEAN_SCHEMA),
  paths = SCHEMA.Optional(RELPATHS_SCHEMA),
  path_hash_prefixes = SCHEMA.Optional(PATH_HASH_PREFIXES_SCHEMA))

# A dict of roles where the dict keys are role names and the dict values holding 
# the role data/information.
ROLEDICT_SCHEMA = SCHEMA.DictOf(
  key_schema = ROLENAME_SCHEMA,
  value_schema = ROLE_SCHEMA)

# A dictionary of ROLEDICT, where dictionary keys can be repository names, and
# dictionary values containing information for each role available on the
# repository (corresponding to the repository belonging to named repository in
# the dictionary key)
ROLEDICTDB_SCHEMA = SCHEMA.DictOf(
  key_schema = NAME_SCHEMA,
  value_schema = ROLEDICT_SCHEMA)

# Like ROLEDICT_SCHEMA, except that ROLE_SCHEMA instances are stored in order.
ROLELIST_SCHEMA = SCHEMA.ListOf(ROLE_SCHEMA)

# Simply a list of role names, to be used in MULTI_ROLE_DELEGATION_SCHEMA.
ROLENAMELIST_SCHEMA = SCHEMA.ListOf(ROLENAME_SCHEMA)

MULTI_ROLE_DELEGATION_SCHEMA = SCHEMA.Object(
# Role object in {'keyids': [keydids..], 'name': 'ABC', 'threshold': 1,
# 'paths':[filepaths..]} format.
  object_name = 'MULTI_ROLE_DELEGATION_SCHEMA',
  #name = SCHEMA.Optional(ROLENAME_SCHEMA), #MRDs don't have names.
  #keyids = KEYIDS_SCHEMA,
  required_roles = ROLENAMELIST_SCHEMA,
  #threshold = THRESHOLD_SCHEMA,
  backtrack = SCHEMA.Optional(BOOLEAN_SCHEMA),
  # If target info differs among the required roles, throw error or allow
  # backtracking?
  abort_on_disagreement = SCHEMA.Optional(BOOLEAN_SCHEMA),
  paths = SCHEMA.Optional(RELPATHS_SCHEMA))
  #path_hash_prefixes = SCHEMA.Optional(PATH_HASH_PREFIXES_SCHEMA)) # TODO

MULTI_ROLE_DELEGATION_LIST_SCHEMA = SCHEMA.ListOf(MULTI_ROLE_DELEGATION_SCHEMA)

# The delegated roles of a Targets role (a parent).
DELEGATIONS_SCHEMA = SCHEMA.Object(
  keys = KEYDICT_SCHEMA,
  roles = ROLELIST_SCHEMA,
  multiroledelegations = SCHEMA.Optional(MULTI_ROLE_DELEGATION_LIST_SCHEMA))

# Supported compression extension (e.g., 'gz').
COMPRESSION_SCHEMA = SCHEMA.OneOf([SCHEMA.String(''), SCHEMA.String('gz')])

# List of supported compression extensions.
COMPRESSIONS_SCHEMA = SCHEMA.ListOf(
  SCHEMA.OneOf([SCHEMA.String(''), SCHEMA.String('gz')]))

# The fileinfo format of targets specified in the repository and
# developer tools.  The second element of this list holds custom data about the
# target, such as file permissions, author(s), last modified, etc.
CUSTOM_SCHEMA = SCHEMA.Object()

PATH_FILEINFO_SCHEMA = SCHEMA.DictOf(
  key_schema = RELPATH_SCHEMA,
  value_schema = CUSTOM_SCHEMA)

# tuf.roledb
ROLEDB_SCHEMA = SCHEMA.Object(
  object_name = 'ROLEDB_SCHEMA',
  keyids = KEYIDS_SCHEMA,
  signing_keyids = SCHEMA.Optional(KEYIDS_SCHEMA),
  threshold = THRESHOLD_SCHEMA,
  version = SCHEMA.Optional(METADATAVERSION_SCHEMA),
  expires = SCHEMA.Optional(ISO8601_DATETIME_SCHEMA),
  signatures = SCHEMA.Optional(SIGNATURES_SCHEMA),
  compressions = SCHEMA.Optional(COMPRESSIONS_SCHEMA),
  paths = SCHEMA.Optional(SCHEMA.OneOf([RELPATHS_SCHEMA, PATH_FILEINFO_SCHEMA])),
  path_hash_prefixes = SCHEMA.Optional(PATH_HASH_PREFIXES_SCHEMA),
  delegations = SCHEMA.Optional(DELEGATIONS_SCHEMA),
  partial_loaded = SCHEMA.Optional(BOOLEAN_SCHEMA))

# Root role: indicates root keys and top-level roles.
ROOT_SCHEMA = SCHEMA.Object(
  object_name = 'ROOT_SCHEMA',
  _type = SCHEMA.String('Root'),
  version = METADATAVERSION_SCHEMA,
  consistent_snapshot = BOOLEAN_SCHEMA,
  compression_algorithms = COMPRESSIONS_SCHEMA,
  expires = ISO8601_DATETIME_SCHEMA,
  keys = KEYDICT_SCHEMA,
  roles = ROLEDICT_SCHEMA)

# Targets role: Indicates targets and delegates target paths to other roles.
TARGETS_SCHEMA = SCHEMA.Object(
  object_name = 'TARGETS_SCHEMA',
  _type = SCHEMA.String('Targets'),
  version = METADATAVERSION_SCHEMA,
  expires = ISO8601_DATETIME_SCHEMA,
  targets = FILEDICT_SCHEMA,
  delegations = SCHEMA.Optional(DELEGATIONS_SCHEMA))

# Snapshot role: indicates the latest versions of all metadata (except timestamp).
SNAPSHOT_SCHEMA = SCHEMA.Object(
  object_name = 'SNAPSHOT_SCHEMA',
  _type = SCHEMA.String('Snapshot'),
  version = METADATAVERSION_SCHEMA,
  expires = ISO8601_DATETIME_SCHEMA,
  meta = FILEINFODICT_SCHEMA)

# Timestamp role: indicates the latest version of the snapshot file.
TIMESTAMP_SCHEMA = SCHEMA.Object(
  object_name = 'TIMESTAMP_SCHEMA',
  _type = SCHEMA.String('Timestamp'),
  version = METADATAVERSION_SCHEMA,
  expires = ISO8601_DATETIME_SCHEMA,
  meta = FILEDICT_SCHEMA)

# project.cfg file: stores information about the project in a json dictionary
PROJECT_CFG_SCHEMA = SCHEMA.Object(
    object_name = 'PROJECT_CFG_SCHEMA',
    project_name = SCHEMA.AnyString(),
    layout_type = SCHEMA.OneOf([SCHEMA.String('repo-like'), SCHEMA.String('flat')]),
    targets_location = PATH_SCHEMA,
    metadata_location = PATH_SCHEMA,
    prefix = PATH_SCHEMA,
    public_keys = KEYDICT_SCHEMA,
    threshold = SCHEMA.Integer(lo = 0, hi = 2)
    )

# A schema containing information a repository mirror may require,
# such as a url, the path of the directory metadata files, etc.
MIRROR_SCHEMA = SCHEMA.Object(
  object_name = 'MIRROR_SCHEMA',
  url_prefix = URL_SCHEMA,
  metadata_path = RELPATH_SCHEMA,
  targets_path = RELPATH_SCHEMA,
  confined_target_dirs = SCHEMA.Optional(RELPATHS_SCHEMA), # should now default to ['']
  custom = SCHEMA.Optional(SCHEMA.Object()))

# A dictionary of mirrors where the dict keys hold the mirror's name and
# and the dict values the mirror's data (i.e., 'MIRROR_SCHEMA').
# The SingleRepoUpdater class of 'updater.py' accepts dictionaries of this
# type.
MIRRORDICT_SCHEMA = SCHEMA.DictOf(
  key_schema = SCHEMA.AnyString(),
  value_schema = MIRROR_SCHEMA)

# # A dictionary of mirror dictionaries, one mirror dictionary per repository.
# # The Updater class of 'updater.py' accepts dictionaries of this type.
# MULTIREPO_MIRRORDICT_SCHEMA = SCHEMA.DictOf(
#   key_schema = SCHEMA.AnyString(), # Repository name
#   value_schema = MIRRORDICT_SCHEMA)

# A Mirrorlist: indicates all the live mirrors, and what documents they
# serve.
MIRRORLIST_SCHEMA = SCHEMA.Object(
  object_name = 'MIRRORLIST_SCHEMA',
  _type = SCHEMA.String('Mirrors'),
  version = METADATAVERSION_SCHEMA,
  expires = ISO8601_DATETIME_SCHEMA,
  mirrors = SCHEMA.ListOf(MIRROR_SCHEMA))

# Per tentative design for pinned.json. To replace MIRROR_SCHEMA et al. when
# confirmed.
ALT_MIRROR_SCHEMA = URL_SCHEMA

ALT_MIRRORLIST_SCHEMA = SCHEMA.ListOf(ALT_MIRROR_SCHEMA)

REPOSITORY_NAME_SCHEMA = SCHEMA.AnyString()

# A repository listing inside pinned.json.
PINNED_REPOSITORY_SCHEMA = SCHEMA.Object(
  #local_metadata_directory = SCHEMA.AnyString(), # path to client's local metadata directory
  #root_override_URLs = SCHEMA.Optional(ListOf(SCHEMA.AnyString())), # URLs for root files, optional
  mirrors = ALT_MIRRORLIST_SCHEMA)

# PINNED_REPOSITORIES_SCHEMA = SCHEMA.DictOf(
#   key_schema = REPOSITORY_NAME_SCHEMA,
#   value_schema = PINNED_REPOSITORY_SCHEMA)

# A delegation inside pinned.json.
PINNING_DELEGATION_SCHEMA = SCHEMA.Object(
  paths = SCHEMA.ListOf(SCHEMA.AnyString()),
  repositories = SCHEMA.ListOf(SCHEMA.AnyString()),
  terminating = SCHEMA.Optional(SCHEMA.Boolean()))

# pinned.json: client-only file that determines which repository/repositories
# to use for which targets.
PINNING_FILE_SCHEMA = SCHEMA.Object(
    #object_name = 'PINNINGS_FILE_SCHEMA',
    repositories = SCHEMA.DictOf(
        key_schema = REPOSITORY_NAME_SCHEMA,
        value_schema = PINNED_REPOSITORY_SCHEMA),
    delegations = SCHEMA.ListOf(PINNING_DELEGATION_SCHEMA))


# Any of the role schemas (e.g., TIMESTAMP_SCHEMA, SNAPSHOT_SCHEMA, etc.)
ANYROLE_SCHEMA = SCHEMA.OneOf([ROOT_SCHEMA, TARGETS_SCHEMA, SNAPSHOT_SCHEMA,
                               TIMESTAMP_SCHEMA, MIRROR_SCHEMA])





class MetaFile(object):
  """
  <Purpose>
    Base class for all metadata file classes.
    Classes representing metadata files such as RootFile
    and SnapshotFile all inherit from MetaFile.  The
    __eq__, __ne__, perform 'equal' and 'not equal' comparisons
    between Metadata File objects.
  """

  info = None

  def __eq__(self, other):
    return isinstance(other, MetaFile) and self.info == other.info
  
  __hash__ = None

  def __ne__(self, other):
    return not self.__eq__(other)


  def __getattr__(self, name):
    """
      Allow all metafile objects to have their interesting attributes
      referred to directly without the info dict. The info dict is just
      to be able to do the __eq__ comparison generically.
    """
   
    if name in self.info:
      return self.info[name]
    
    else:
      raise AttributeError(name)





def build_dict_conforming_to_schema(schema, **kwargs):
  """
  Given a schema object (for example, TIMESTAMP_SCHEMA from this module) and
  a set of keyword arguments, create a dictionary that conforms to the given
  schema, using the keyword arguments to define the elements of the new dict.

  Checks the result to make sure that it conforms to the given schema, raising
  an error if not.

  Returns the new dict conforming to the schema if there are no problems.
  """

  # Check that schema supports a check_match call.
  # Duck typing version of this check:
  if not hasattr(schema, 'check_match'):
    raise ValueError(
        'The given "schema" does not seem to be a schema.  It has no '
        '"check_match" method.  Given schema: ' + repr(schema))

  # # Strict typing version of this check:
  # # Check that schema_name is a SCHEMA.Object.
  # if not isinstance(schema, schema.Schema):
  #   raise ValueError(
  #       'The first argument must be a schema.Schema object, but is not. '
  #       'Given schema: ' + repr(schema))

  # The return value.
  d = {}

  for key, value in kwargs.items():
    d[key] = value

  schema.check_match(d)

  return d





# A dict holding the recognized schemas for the top-level roles.
SCHEMAS_BY_TYPE = {
  'Root' : ROOT_SCHEMA,
  'Targets' : TARGETS_SCHEMA,
  'Snapshot' : SNAPSHOT_SCHEMA,
  'Timestamp' : TIMESTAMP_SCHEMA,
  'Mirrors' : MIRRORLIST_SCHEMA}





def datetime_to_unix_timestamp(datetime_object):
  """
  <Purpose>
    Convert 'datetime_object' (in datetime.datetime()) format) to a Unix/POSIX
    timestamp.  For example, Python's time.time() returns a Unix timestamp, and
    includes the number of microseconds.  'datetime_object' is converted to UTC.

    >>> datetime_object = datetime.datetime(1985, 10, 26, 1, 22)
    >>> timestamp = datetime_to_unix_timestamp(datetime_object)
    >>> timestamp 
    499137720

  <Arguments>
    datetime_object:
      The datetime.datetime() object to convert to a Unix timestamp.

  <Exceptions>
    tuf.FormatError, if 'datetime_object' is not a datetime.datetime() object.

  <Side Effects>
    None.

  <Returns>
    A unix (posix) timestamp (e.g., 499137660).
  """
  
  # Is 'datetime_object' a datetime.datetime() object?
  # Raise 'tuf.FormatError' if not.
  if not isinstance(datetime_object, datetime.datetime):
    message = repr(datetime_object) + ' is not a datetime.datetime() object.'
    raise tuf.FormatError(message) 
   
  unix_timestamp = calendar.timegm(datetime_object.timetuple())
  
  return unix_timestamp





def unix_timestamp_to_datetime(unix_timestamp):
  """
  <Purpose>
    Convert 'unix_timestamp' (i.e., POSIX time, in UNIX_TIMESTAMP_SCHEMA format)
    to a datetime.datetime() object.  'unix_timestamp' is the number of seconds
    since the epoch (January 1, 1970.)
   
    >>> datetime_object = unix_timestamp_to_datetime(1445455680)
    >>> datetime_object 
    datetime.datetime(2015, 10, 21, 19, 28)

  <Arguments>
    unix_timestamp:
      An integer representing the time (e.g., 1445455680).  Conformant to
      'tuf.formats.UNIX_TIMESTAMP_SCHEMA'.

  <Exceptions>
    tuf.FormatError, if 'unix_timestamp' is improperly formatted.

  <Side Effects>
    None.

  <Returns>
    A datetime.datetime() object corresponding to 'unix_timestamp'.
  """
  
  # Is 'unix_timestamp' properly formatted?
  # Raise 'tuf.FormatError' if there is a mismatch.
  UNIX_TIMESTAMP_SCHEMA.check_match(unix_timestamp)

  # Convert 'unix_timestamp' to a 'time.struct_time',  in UTC.  The Daylight
  # Savings Time (DST) flag is set to zero.  datetime.fromtimestamp() is not
  # used because it returns a local datetime.
  struct_time = time.gmtime(unix_timestamp)

  # Extract the (year, month, day, hour, minutes, seconds) arguments for the 
  # datetime object to be returned.
  datetime_object = datetime.datetime(*struct_time[:6])

  return datetime_object




def format_base64(data):
  """
  <Purpose>
    Return the base64 encoding of 'data' with whitespace
    and '=' signs omitted.

  <Arguments>
    data:
      Binary or buffer of data to convert.

  <Exceptions>
    tuf.FormatError, if the base64 encoding fails or the argument
    is invalid.

  <Side Effects>
    None.

  <Returns>
    A base64-encoded string.
  """
  
  try:
    return binascii.b2a_base64(data).decode('utf-8').rstrip('=\n ')
  
  except (TypeError, binascii.Error) as e:
    raise tuf.FormatError('Invalid base64 encoding: ' + str(e))





def parse_base64(base64_string):
  """
  <Purpose>
    Parse a base64 encoding with whitespace and '=' signs omitted.
  
  <Arguments>
    base64_string:
      A string holding a base64 value.

  <Exceptions>
    tuf.FormatError, if 'base64_string' cannot be parsed due to
    an invalid base64 encoding.

  <Side Effects>
    None.

  <Returns>
    A byte string representing the parsed based64 encoding of
    'base64_string'.
  """

  if not isinstance(base64_string, six.string_types):
    message = 'Invalid argument: '+repr(base64_string)
    raise tuf.FormatError(message)

  extra = len(base64_string) % 4
  if extra:
    padding = '=' * (4 - extra)
    base64_string = base64_string + padding

  try:
    return binascii.a2b_base64(base64_string.encode('utf-8'))
  
  except (TypeError, binascii.Error) as e:
    raise tuf.FormatError('Invalid base64 encoding: ' + str(e))





def make_signable(object):
  """
  <Purpose>
    Return the role metadata 'object' in 'SIGNABLE_SCHEMA' format.
    'object' is added to the 'signed' key, and an empty list
    initialized to the 'signatures' key.  The caller adds signatures
    to this second field.
    Note: check_signable_object_format() should be called after
    make_signable() and signatures added to ensure the final
    signable object has a valid format (i.e., a signable containing
    a supported role metadata).

  <Arguments>
    object:
      A role schema dict (e.g., 'ROOT_SCHEMA', 'SNAPSHOT_SCHEMA'). 

  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    A dict in 'SIGNABLE_SCHEMA' format.
  """

  if not isinstance(object, dict) or 'signed' not in object:
    return { 'signed' : object, 'signatures' : [] }
  else:
    return object





def make_fileinfo(length, hashes, version=None, custom=None):
  """
  <Purpose>
    Create a dictionary conformant to 'FILEINFO_SCHEMA'.
    This dict describes both metadata and target files.

  <Arguments>
    length:
      An integer representing the size of the file.

    hashes:
      A dict of hashes in 'HASHDICT_SCHEMA' format, which has the form:
       {'sha256': 123df8a9b12, 'sha512': 324324dfc121, ...}

    version:
      An optional integer representing the version of the file.

    custom:
      An optional object providing additional information about the file.

  <Exceptions>
    tuf.FormatError, if the 'FILEINFO_SCHEMA' to be returned
    does not have the correct format.

  <Side Effects>
    If any of the arguments are incorrectly formatted, the dict
    returned will be checked for formatting errors, and if found,
    will raise a 'tuf.FormatError' exception.

  <Returns>
    A dictionary conformant to 'FILEINFO_SCHEMA', representing the file
    information of a metadata or target file.
  """

  fileinfo = {'length' : length, 'hashes' : hashes}

  if version is not None:
    fileinfo['version'] = version 

  if custom is not None:
    fileinfo['custom'] = custom

  # Raise 'tuf.FormatError' if the check fails.
  FILEINFO_SCHEMA.check_match(fileinfo)

  return fileinfo





def make_versioninfo(version_number):
  """
  <Purpose>
    Create a dictionary conformant to 'VERSIONINFO_SCHEMA'.  This dict
    describes both metadata and target files.

  <Arguments>
    version_number:
      An integer representing the version of a particular metadata role.
      The dictionary returned by this function is expected to be included
      in Snapshot metadata.

  <Exceptions>
    tuf.FormatError, if the dict to be returned does not have the correct
    format (i.e., VERSIONINFO_SCHEMA).

  <Side Effects>
    None.

  <Returns>
    A dictionary conformant to 'VERSIONINFO_SCHEMA', containing the version
    information of a metadata role.
  """

  versioninfo = {'version': version_number}

  # Raise 'tuf.FormatError' if 'versioninfo' is improperly formatted.
  try: 
    VERSIONINFO_SCHEMA.check_match(versioninfo)
  
  except:
    raise
  
  else:
    return versioninfo



# TODO: Destroy this function. Use build_dict_conforming_to_schema instead.
def make_role_metadata(keyids, threshold, name=None, paths=None,
                       path_hash_prefixes=None):
  """
  <Purpose>
    Create a dictionary conforming to 'tuf.formats.ROLE_SCHEMA',
    representing the role with 'keyids', 'threshold', and 'paths'
    as field values.  'paths' is optional (i.e., used only by the
    'Target' role).

  <Arguments>
    keyids: a list of key ids.

    threshold:
      An integer denoting the number of required keys
      for the signing role.

    name:
      A string that is the name of this role.

    paths:
      The 'Target' role stores the paths of target files
      in its metadata file.  'paths' is a list of
      file paths.

    path_hash_prefixes:
      The 'Target' role stores the paths of target files in its metadata file.
      'path_hash_prefixes' is a succint way to describe a set of paths to
      target files.

  <Exceptions>
    tuf.FormatError, if the returned role meta is
    formatted incorrectly.

  <Side Effects>
    If any of the arguments do not have a proper format, a 
    tuf.formats exception is raised when the 'ROLE_SCHEMA' dict
    is created.

  <Returns>
    A properly formatted role meta dict, conforming to
    'ROLE_SCHEMA'.
  """

  role_meta = {}
  role_meta['keyids'] = keyids
  role_meta['threshold'] = threshold

  if name is not None:
    role_meta['name'] = name

  # According to the specification, the 'paths' and 'path_hash_prefixes' must
  # be mutually exclusive. However, at the time of writing we do not always
  # ensure that this is the case with the schema checks (see #83). Therefore,
  # we must do it for ourselves.

  if paths is not None and path_hash_prefixes is not None:
    raise \
      tuf.FormatError('Both "paths" and "path_hash_prefixes" are specified.')

  if path_hash_prefixes is not None:
    role_meta['path_hash_prefixes'] = path_hash_prefixes
  elif paths is not None:
    role_meta['paths'] = paths

  # Does 'role_meta' have the correct type?
  # This check ensures 'role_meta' conforms to
  # tuf.formats.ROLE_SCHEMA.
  ROLE_SCHEMA.check_match(role_meta)

  return role_meta





def expected_meta_rolename(meta_rolename):
  """
  <Purpose>
    Ensure 'meta_rolename' is properly formatted.
    'targets' is returned as 'Targets'.
    'targets role1' is returned as 'Targets Role1'.

    The words in the string (i.e., separated by whitespace)
    are capitalized.

  <Arguments>
    meta_rolename:
      A string representing the rolename.
      E.g., 'root', 'targets'.

  <Exceptions>
    tuf.FormatError, if 'meta_rolename' is improperly formatted.

  <Side Effects>
    None.

  <Returns>
    A string (e.g., 'Root', 'Targets').
  """
   
  # Does 'meta_rolename' have the correct type?
  # This check ensures 'meta_rolename' conforms to
  # 'tuf.formats.NAME_SCHEMA'.
  # Raise 'tuf.FormatError' if there is a mismatch.
  NAME_SCHEMA.check_match(meta_rolename)
  
  return string.capwords(meta_rolename)





def check_signable_object_format(object):
  """
  <Purpose>
    Ensure 'object' is properly formatted, conformant to
    'tuf.formats.SIGNABLE_SCHEMA'.  Return the signing role on success.
    Note: The 'signed' field of a 'SIGNABLE_SCHEMA' is checked against
    tuf.schema.Any().  The 'signed' field, however, should actually
    hold one of the supported role schemas (e.g., 'ROOT_SCHEMA',
    'TARGETS_SCHEMA').  The role schemas all differ in their format, so this
    function determines exactly which schema is listed in the 'signed'
    field.

  <Arguments>
    object:
     The object compare against 'SIGNABLE.SCHEMA'. 

  <Exceptions>
    tuf.FormatError, if 'object' does not have the correct format.

  <Side Effects>
    None.

  <Returns>
    A string representing the signing role (e.g., 'root', 'targets').
    The role string is returned with characters all lower case.
  """
  
  # Does 'object' have the correct type?
  # This check ensures 'object' conforms to
  # 'tuf.formats.SIGNABLE_SCHEMA'.
  SIGNABLE_SCHEMA.check_match(object)

  try:
    role_type = object['signed']['_type']
  
  except (KeyError, TypeError):
    raise tuf.FormatError('Untyped object')
  
  try:
    schema = SCHEMAS_BY_TYPE[role_type]
  
  except KeyError:
    raise tuf.FormatError('Unrecognized type ' + repr(role_type))
  
  # 'tuf.FormatError' raised if 'object' does not have a properly
  # formatted role schema.
  schema.check_match(object['signed'])

  return role_type.lower()





def _canonical_string_encoder(string):
  """
  <Purpose>
    Encode 'string' to canonical string format.
    
  <Arguments>
    string:
      The string to encode.

  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    A string with the canonical-encoded 'string' embedded.
  """

  string = '"%s"' % re.sub(r'(["\\])', r'\\\1', string)
 
  return string





def _encode_canonical(object, output_function):
  # Helper for encode_canonical.  Older versions of json.encoder don't
  # even let us replace the separators.

  if isinstance(object, six.string_types):
    output_function(_canonical_string_encoder(object))
  elif object is True:
    output_function("true")
  elif object is False:
    output_function("false")
  elif object is None:
    output_function("null")
  elif isinstance(object, six.integer_types):
    output_function(str(object))
  elif isinstance(object, (tuple, list)):
    output_function("[")
    if len(object):
      for item in object[:-1]:
        _encode_canonical(item, output_function)
        output_function(",")
      _encode_canonical(object[-1], output_function)
    output_function("]")
  elif isinstance(object, dict):
    output_function("{")
    if len(object):
      items = sorted(six.iteritems(object))
      for key, value in items[:-1]:
        output_function(_canonical_string_encoder(key))
        output_function(":")
        _encode_canonical(value, output_function)
        output_function(",")
      key, value = items[-1]
      output_function(_canonical_string_encoder(key))
      output_function(":")
      _encode_canonical(value, output_function)
    output_function("}")
  else:
    raise tuf.FormatError('I cannot encode '+repr(object))





def encode_canonical(object, output_function=None):
  """
  <Purpose>
    Encode 'object' in canonical JSON form, as specified at
    http://wiki.laptop.org/go/Canonical_JSON .  It's a restricted
    dialect of JSON in which keys are always lexically sorted,
    there is no whitespace, floats aren't allowed, and only quote
    and backslash get escaped.  The result is encoded in UTF-8,
    and the resulting bits are passed to output_function (if provided),
    or joined into a string and returned.

    Note: This function should be called prior to computing the hash or
    signature of a JSON object in TUF.  For example, generating a signature
    of a signing role object such as 'ROOT_SCHEMA' is required to ensure
    repeatable hashes are generated across different json module versions
    and platforms.  Code elsewhere is free to dump JSON objects in any format
    they wish (e.g., utilizing indentation and single quotes around object
    keys).  These objects are only required to be in "canonical JSON" format
    when their hashes or signatures are needed.

    >>> encode_canonical("")
    '""'
    >>> encode_canonical([1, 2, 3])
    '[1,2,3]'
    >>> encode_canonical([])
    '[]'
    >>> encode_canonical({"A": [99]})
    '{"A":[99]}'
    >>> encode_canonical({"x" : 3, "y" : 2})
    '{"x":3,"y":2}'
  
  <Arguments>
    object:
      The object to be encoded.

    output_function:
      The result will be passed as arguments to 'output_function'
      (e.g., output_function('result')).

  <Exceptions>
    tuf.FormatError, if 'object' cannot be encoded or 'output_function'
    is not callable.

  <Side Effects>
    The results are fed to 'output_function()' if 'output_function' is set.  

  <Returns>
    A string representing the 'object' encoded in canonical JSON form.
  """

  result = None
  # If 'output_function' is unset, treat it as
  # appending to a list.
  if output_function is None:
    result = []
    output_function = result.append

  try:
    _encode_canonical(object, output_function)
  
  except (TypeError, tuf.FormatError) as  e:
    message = 'Could not encode ' + repr(object) + ': ' + str(e)
    raise tuf.FormatError(message)

  # Return the encoded 'object' as a string.
  # Note: Implies 'output_function' is None,
  # otherwise results are sent to 'output_function'.
  if result is not None:
    return ''.join(result)





if __name__ == '__main__':
  # The interactive sessions of the documentation strings can
  # be tested by running formats.py as a standalone module.
  # python -B formats.py
  import doctest
  doctest.testmod()
